clear matrix
clear
set mem 10g

*Set root data directory
local rootdir 
cd "`rootdir'"


***2-digit to 1-digit conversion for NIC98 to 87

use nic98_87, clear
gen nic98_2digit=substr(nic98,1,2)
gen nic87_1digit=substr(nic87,1,1)

*no rule for cases with nic98 codes in multiple nic87s, just drop duplicates
duplicates drop nic98_2digit, force
drop nic98 nic87
ren nic98_2digit nic98
ren nic87_1digit nic87
save nic98_87_div, replace

*2-digit NIC04 to NIC98 conversions
use nic04_98, clear
gen nic98_2digit=substr(nic98,1,2)
gen nic04_2digit=substr(nic04,1,2)
duplicates drop nic04_2digit, force
drop nic98 nic04
ren nic98_2digit nic98
ren nic04_2digit nic04
save nic04_98_div, replace






*********38th round**************
cd "`rootdir'round38\"

***NOTE: raw ASCII (.dat) already in STATA format - use .dct and .do files provided (from Topalova (2005) to read-in
*d144h d156p 156a    household, person, activity   for r(ural) and u(rban)
*.dct files and .do files        
*outputs are same name .dta files


***Household file
use d144hu, clear
append using d144hr
ren statereg38 region
gen segment=1
gen substratum=1
duplicates drop sector subround subsample region fsu hhno, force
save d144h, replace

***Person file
use d156pr, clear
append using d156pu
keep sector-srl_n_m domduty-assist_code rel_to_hd sex age gen_edu status_wa w_nic w_nco pr_nic pr_nco pr_status sub_nic sub_nco sub_status
ren statereg38 region
gen segment=1
gen substratum=1

***note that poultry and dairy are separate here. let's make them the same
ren yesdomduty_why domreas
ren nodomduty_why domreas2
ren kitchen garden
gen poultry=2
replace poultry=1 if poulty==1
replace poultry=1 if dairy==1
*ren poultrydairy poultry
drop poulty dairy
ren fish_veg collectfood
ren firewood collectwood
ren husking huskpaddy
ren grinding grindgrain
ren gur prepgur

summ huskpaddy grindgrain

*missing
*ren presfish presfood
gen presfood=.
*weaving is included in sewing, presumably basket making is weaving?
gen baskets=.

ren sewing sew
ren tutoring tutor
ren bringwater_out collectwater
ren bringwater_outvil outsidewater
ren bringwater_km waterdist
ren want_work acceptwork
ren wrkacc_nature natwork
ren wrkacc_type typework
ren skill_code reqskill
ren cow_dung prepdung
ren assist_code assreq

bysort sector subround subsample region fsu hhno srl_n_m: gen counter=_N
bysort sector subround subsample region fsu hhno: egen maxcounter=max(counter)
tab maxcounter
sort sector subround subsample region fsu hhno age sex rel_to_hd
by sector subround subsample region fsu hhno: gen runningcounter=_n
*relabeling members if maxcounter>1, in age/sex/rel_to_hd order
replace srl_n_m=runningcounter if maxcounter>1
*make sure to do the same for activities!
drop counter
drop if srl_n_m==.

save d156p, replace


***Activities
use d156au, clear
append using d156ar

drop if specialcode == 2
drop if tot_day==0
ren statereg38 region

**how to get the same serial numbers after relabelling?
duplicates drop sector subround subsample region fsu hhno srl_n_m sex age rel_to_hd, force
bysort sector subround subsample region fsu hhno srl_n_m: gen counter=_N
bysort sector subround subsample region fsu hhno: egen maxcounter=max(counter)
tab maxcounter
sort sector subround subsample region fsu hhno age sex rel_to_hd
by sector subround subsample region fsu hhno: gen runningcounter=_n
*relabeling members if maxcounter>1, in age/sex/rel_to_hd order
gen srl_n_m_alt=runningcounter if maxcounter>1
keep if maxcounter>1
keep sector subround subsample region fsu hhno age sex rel_to_hd srl_n_m srl_n_m_alt
sort sector subround subsample region fsu hhno age sex rel_to_hd srl_n_m
save temp, replace


use d156au, clear
append using d156ar
ren statereg38 region

drop if specialcode == 2
drop if tot_day==0

sort sector subround subsample region fsu hhno age sex rel_to_hd srl_n_m
merge m:1 sector subround subsample region fsu hhno age sex rel_to_hd srl_n_m using temp
tab _merge
replace srl_n_m=srl_n_m_alt if _merge==3
drop srl_n_m_alt _merge


bysort sector subround subsample region fsu hhno srl_n_m srl_n_a: gen counter=_N
tab counter
bysort sector subround subsample region fsu hhno srl_n_m: egen maxcounter=max(counter)
bysort sector subround subsample region fsu hhno srl_n_m: gen runcounter=_n
bysort sector subround subsample region fsu hhno srl_n_m: egen totdays=total(tot_day)
*relabelling activities
replace srl_n_a=runcounter if totdays==7
drop counter maxcounter runcounter

bysort sector subround subsample region fsu hhno srl_n_m srl_n_a: gen counter=_N
tab counter
bysort sector subround subsample region fsu hhno srl_n_m: egen maxcounter=max(counter)

*drop days duplicates
drop if maxcounter>1 & tot_day==tot_day[_n-1] | tot_day==[_n+1]
*keep more disaggregated
drop if maxcounter>1 & tot_day==7
drop maxcounter counter

bysort sector subround subsample region fsu hhno srl_n_m srl_n_a: gen counter=_N
tab counter

//Generate round variable
gen round = 38

keep sector subround subsample region fsu hhno srl_n_m srl_n_a status_da inddivcoded opcode tot_day wage_cash wage_kind wage_tot

merge m:1 sector subround subsample region fsu hhno using d144h
tab _merge
drop _merge

merge m:1 sector subround subsample region fsu hhno srl_n_m using d156p
tab _merge
drop _merge

drop if srl_n_a==.
drop if srl_n_m==.

//convert land measurements from acres to hectares
ren hhsz hhsize
ren hhtypecode hhtype
ren hhreligion religion
gen land=hhlandp * 0.404
replace land=99.99 if land>99.99 & land~=.

drop if srl_n_a==.
drop if srl_n_m==.
gen mult=mult_comb

//CHANGE THE FOLLOWING TO ADD MORE VARIABLES (YOU MAY NEED TO MERGE WITH ADDITIONAL RAW DATA FILES FOR SOME VARIABLES)
//Keep variables of interest
#delimit ;
keep sector subround subsample region fsu hhno segment substratum srl_n_m srl_n_a /*identifier vars*/
     hhsize hh_nic hh_nco hhtype religion hhgroup land pce /*demographic vars*/
	 rel_to_hd sex age gen_edu
	 status_da status_wa pr_status sub_status w_nic w_nco opcode pr_nic pr_nco sub_nic sub_nco inddivcoded tot_day /*activity vars*/
	 wage_cash wage_kind wage_tot /*wage vars*/
	 /*geographic vars*/
	 mult /*multiplier vars*/
	 domduty-baskets
	 ;
#delimit cr

gen round=38
replace subsample=7 if subsample==.
egen id=group(sector subround subsample region fsu hhno srl_n_m)

duplicates drop id srl_n_a, force



***relabel activities, dropping zeros
replace tot_day=0 if tot_day==.
sort id tot_day
by id: gen counter=_N+1-_n

replace tot_day=. if tot_day==0

replace srl_n_a=counter
drop counter


//combine multiple activities per individual into single observation
reshape wide status_da opcode inddivcoded tot_day ///
			 wage_cash wage_kind wage_tot, i(id) j(srl_n_a)

			 
merge m:1 sector subround region fsu hhno using "`rootdir'round38_edit"
tab _merge		 
**note the loss of about 5.6% of matches when merging to consumption data, no known solution
		 

replace subsample=7 if subsample==.
replace segment=1 if segment~=1
replace substratum=1 if substratum~=1

ren xgrains grains
ren xpulses pulses
ren xmilk milk
ren xoil oils
ren xveg veg
ren xfru fru
ren xmeat meat
gen food_other=xbev+xproc+xsugar
label var food_other "other food, inc. sugar, spices, processed foods"
ren xintox intox
ren xlight light
ren xent entertain
ren xeduc education
*definition different?
ren xmed medical
ren xnd nondurables
ren xtrans transport
ren xactrent rent
ren xser services
label var services "other consumer services"
*combined clothing and footwear
gen clothing=xcloth
ren xdur durables
ren xtax tax
cap: gen round=38



#delimit ;
keep round sector subround subsample region fsu hhno segment substratum srl_n_m/*identifier vars*/
     hhsize hh_nic hh_nco hhtype religion hhgroup land pce /*demographic vars*/
	 rel_to_hd sex age gen_edu
	 	 hh_nic w_nic pr_nic sub_nic status_wa pr_status sub_status hh_nco w_nco pr_nco sub_nco
hh_nco w_nco pr_nco sub_nco
status_da* opcode* inddivcoded* tot_day*
			 wage_cash* wage_kind* wage_tot* /*activity codes*/
 /*geographic vars*/
	 mult /*multiplier vars*/
	 domduty-baskets /*domestic variables*/
	 grains pulses milk oils veg fru meat food_other
	intox light entertain education medical nondurables transport rent
	services clothing durables tax /*consumption variables*/
	 ;
#delimit cr

**expenditure and household size consistency checks
foreach j in rent durables education entertain fru grains intox light meat medical milk nondurables oils pulses services transport veg food_other clothing tax{
replace `j'=0 if `j'==.
}

*exclude rent and tax from total expenditure
gen totexp=durables+education+entertain+fru+grains+intox+light+meat+medical+milk+nondurables+oils+pulses+services+transport+veg+food_other+clothing

gen xfood=fru+grains+meat+milk+oils+pulses+veg+food_other
gen wfood=xfood/totexp
gen wgrains=grains/xfood

capture: drop _merge
***day re-scaling/cleaning
***almost no one with more than four activities, we will censor it there and rescale days
drop *5* *6* *7*

forvalues j=1(1)4{
replace tot_day`j'=0 if tot_day`j'==.
}

gen totaldays=tot_day1+tot_day2+tot_day3+tot_day4
*bysort id: egen totaldays=total(tot_day)
forvalues j=1(1)4{
replace tot_day`j'=tot_day`j'*(7/totaldays)
}
drop totaldays




foreach j in hh_nic w_nic pr_nic sub_nic{
*destring `j', replace
ren `j' `j'70
gen nic70=substr(`j'70,1,2)
sort nic70
merge m:1 nic70 using "`rootdir'nic70_87_2digit"
drop nic70 _merge
ren nic87 nic87mult
merge m:1 nic87mult using "`rootdir'nic87_87_2digit"
replace nic87mult=nic87 if _merge==3
drop nic87 _merge
ren nic87mult `j' 
}


***status code conversion
*Leah's correction
gen sick=0
replace sick=1 if status_wa==99
*status_wa==61 | status_wa==71 | missing from this round

foreach j in pr_status sub_status status_da1 status_da2 status_da3 status_da4 status_wa{
gen holder_`j' = `j'
replace `j'=11 if holder_`j'==1 | holder_`j'==2 | holder_`j'==3 | holder_`j'==4

replace `j'=97 if holder_`j'==94
replace `j'=94 if holder_`j'==96
*replace `j'=101 if holder_`j'==95
replace `j'=96 if holder_`j'==97
replace `j'=97 if holder_`j'==98
replace `j'=97 if holder_`j'==99
*too young=other, too old=disabled
*replace `j'=97 if `j'==100
*replace `j'=95 if `j'==101
}



***education conversion - do we want years of schooling for children?
gen generaledmod=gen_edu
replace generaledmod=6 if generaledmod==9 | generaledmod==7 | generaledmod==8
replace generaledmod=0 if generaledmod==10
replace generaledmod=. if age<18 | age==. | sex==.


***hh type/group/religion conversion
*note that there are only two household types in round 38 - self-employed and other!   in all later rounds 9 gets split into 2(regular salary) 3(casual labor) 9(other)
replace hhgroup=9 if hhgroup==0
replace hhgroup=9 if hhgroup==3

ren status_wa w_status

#delimit ;
keep round sector subround subsample region fsu hhno segment substratum srl_n_m /*identifier vars*/
     hhsize hh_nic hh_nco hhtype religion hhgroup land pce /*demographic vars*/
	 rel_to_hd sex age generaledmod
	 	sick 	 
			 hh_nic70 w_nic70 pr_nic70 sub_nic70 
			 hh_nic w_nic pr_nic sub_nic 
			 w_status pr_status sub_status 
			 hh_nco w_nco pr_nco sub_nco
			 tot_day1-tot_day4 status_da1-status_da4 inddivcoded1-inddivcoded4 opcode1-opcode4 wage_tot1-wage_tot4
 /*activity vars*/
	 /*geographic vars*/
	 mult /*multiplier vars*/
	 domduty-baskets /*domestic variables*/
	 grains pulses milk oils veg fru meat food_other
	intox light entertain education medical nondurables transport rent
	services clothing durables tax 
	totexp xfood wfood wgrains 
	/*consumption variables*/
	 ;
#delimit cr


cap: replace round=38

save "`rootdir'round38_sch10pers", replace














********Round 43**************
cd  "`rootdir'round43\"

**NOTE: data alreay read from .dat/ascii format to stata using
**.do and .dct files from Topalova (2005) provided in directory

*e3dhf e3dpf e3daf    household, person, activity for r(ural) and u(rban)
*.dct files and .do files  
*outputs are same name .dta files


**household data
use e3dhfu.dta, clear
append using e3dhfr.dta
ren statereg43 region

ren assist_irdp irdp
ren males_wrk publicworks_male
ren females_wrk publicworks_female

duplicates drop sector subround subsample region fsu substratum hhno, force
save e3dhf.dta, replace


***Person data

use e3dpfr.dta, clear
append using e3dpfu.dta
keep sector-srl_n_m domduty-assist_code rel_to_hd sex age gen_edu status_wa w_nic w_nco pr_nic pr_nco pr_status sub_nic sub_nco sub_status districtcode

ren statereg43 region

ren yesdomduty_why domreas
ren nodomduty_why domreas2
ren poultrydairy poultry
ren fish_veg collectfood
ren firewood collectwood
ren husking huskpaddy
ren grinding grindgrain
ren gur prepgur
ren presfish presfood
ren sewing sew
ren tutoring tutor
ren bringwater_out collectwater
ren bringwater_outvil outsidewater
ren bringwater_km waterdist
ren want_work acceptwork
ren wrkacc_nature natwork
ren wrkacc_type typework
ren skill_code reqskill
ren assist_code assreq

bysort sector subround subsample region fsu substratum hhno srl_n_m: gen counter=_N
tab counter 
drop if counter==2 & domduty==.
drop counter
save e3dpf, replace


***activity data


use e3dafu.dta, clear
append using e3dafr.dta
duplicates drop
drop if specialcode==2

ren statereg43 region

bysort sector subround subsample region fsu substratum hhno srl_n_m srl_n_a: gen counter=_N
tab counter
bysort sector subround subsample region fsu substratum hhno srl_n_m: egen maxcounter=max(counter) 
drop if nodays_nom==0 & maxcounter>1
drop counter maxcounter
bysort sector subround subsample region fsu substratum hhno srl_n_m srl_n_a: gen counter=_N
tab counter
bysort sector subround subsample region fsu substratum hhno srl_n_m : egen maxcounter=max(counter) 
bysort sector subround subsample region fsu substratum hhno srl_n_m: gen runcounter=_n
bysort sector subround subsample region fsu substratum hhno srl_n_m : egen totdays=total(tot_day)
*relabelling activities
replace srl_n_a=runcounter if totdays==7
drop counter maxcounter runcounter
*what's left?
bysort sector subround subsample region fsu substratum hhno srl_n_m srl_n_a: gen counter=_N
bysort sector subround subsample region fsu substratum hhno srl_n_m: egen maxcounter=max(counter) 
tab maxcounter

**manually change one
replace srl_n_m=1 if fsu==12535 & hhno==3 & maxcounter>1 & sex==1
duplicates drop sector subround subsample region fsu substratum hhno srl_n_m srl_n_a, force
drop maxcounter counter totdays

keep sector subround subsample region fsu substratum hhno srl_n_m srl_n_a status_da inddivcoded opcode tot_day wage_cash wage_kind wage_tot districtcode

**to reshape, we have to drop households with missing srl_n_a (usually missing srl_n_m)
drop if srl_n_m==.
drop if srl_n_a==.

merge m:1 sector subround subsample region fsu substratum hhno using e3dhf
tab _merge
drop _merge

merge m:1 sector subround subsample region fsu substratum hhno srl_n_m using e3dpf
tab _merge
drop _merge

*summ
**to reshape, we have to drop households with missing srl_n_a (usually missing srl_n_m)
drop if srl_n_m==.
drop if srl_n_a==.


//Create round identifier
gen round = 43

ren hhsz hhsize
gen land=hhlandp
replace land=99.99 if land>99.99 & land~=.
ren hhtypecode hhtype
ren hhreligion religion
ren mult_comb mult

//CHANGE THE FOLLOWING TO ADD MORE VARIABLES (YOU MAY NEED TO MERGE WITH ADDITIONAL RAW DATA FILES FOR SOME VARIABLES)
//Keep variables of interest
#delimit ;
keep round sector subround subsample region fsu substratum hhno srl_n_m srl_n_a districtcode/*identifier vars*/
     hhsize land pce hhtype religion hhgroup
	 hh_nic hh_nco  /*demographic vars*/
	 rel_to_hd sex age gen_edu
	 status_da status_wa pr_status sub_status 
	 w_nic w_nco opcode pr_nic pr_nco sub_nic sub_nco inddivcoded tot_day /*activity vars*/
	 wage_cash wage_kind wage_tot /*wage vars*/
	 mult /*multiplier vars*/ 
	 domduty-assreq
	 irdp *publicworks*;
#delimit cr





//Create ID var for each individual (but not for each activity)
egen long id = group(sector subround subsample region fsu substratum hhno)
replace id = (id * 100) + srl_n_m
label var id "Unique ID for each individual based on hhid and srl_n_m"

duplicates drop id srl_n_a, force

***relabel activities, dropping zeros
replace tot_day=0 if tot_day==.
sort id tot_day
by id: gen counter=_N+1-_n


replace tot_day=. if tot_day==0

replace srl_n_a=counter
drop counter


*need to eliminate districts that are different within an ID?!
bysort id: egen maxdist=max(districtcode)
bysort id: egen mindist=min(districtcode)

replace districtcode=districtcode[_n-1] if maxdist~=mindist & fsu==fsu[_n-1]
replace districtcode=districtcode[_n-1] if maxdist~=mindist & fsu==fsu[_n-1]
replace districtcode=districtcode[_n-1] if maxdist~=mindist & fsu==fsu[_n-1]
replace districtcode=districtcode[_n-1] if maxdist~=mindist & fsu==fsu[_n-1]


replace districtcode=districtcode[_n+1] if maxdist~=mindist & fsu==fsu[_n+1]
replace districtcode=districtcode[_n+1] if maxdist~=mindist & fsu==fsu[_n+1]
replace districtcode=districtcode[_n+1] if maxdist~=mindist & fsu==fsu[_n+1]

drop mindist maxdist

bysort region fsu: egen mindistcode=min(districtcode)
replace districtcode=mindistcode if districtcode==.



*summ
//combine multiple activities per individual into single observation
reshape wide status_da opcode inddivcoded tot_day ///
			 wage_cash wage_kind wage_tot, i(id) j(srl_n_a)

			 
merge m:1 sector subround subsample region fsu substratum hhno using "`rootdir'round43_edit"

ren xgrains grains
ren xpulses pulses
ren xmilk milk
ren xoil oils
ren xveg veg
ren xfru fru
ren xmeat meat
gen food_other=xbev+xproc+xsugar
label var food_other "other food, inc. sugar, spices, processed foods"
ren xintox intox
ren xlight light
ren xent entertain
ren xeduc education
*definition different?
ren xmed medical
ren xnd nondurables
ren xtrans transport
ren xactrent rent
ren xser services
label var services "other consumer services"
*combined clothing and footwear
gen clothing=xcloth
ren xdur durables

*so far tax appears to be missing from original data
ren xtax tax


**expenditure and household size consistency checks
foreach j in rent durables education entertain fru grains intox light meat medical milk nondurables oils pulses services transport veg food_other clothing tax{
replace `j'=0 if `j'==.
}

cap: drop totexp
cap: drop xfood
gen totexp=durables+education+entertain+fru+grains+intox+light+meat+medical+milk+nondurables+oils+pulses+services+transport+veg+food_other+clothing


gen xfood=fru+grains+meat+milk+oils+pulses+veg+food_other
gen wfood=xfood/totexp
gen wgrains=grains/xfood


capture: drop _merge
***day re-scaling/cleaning
***almost no one with more than four activities, we will censor it there and rescale days
cap: drop *5*
cap: drop *6*
cap: drop *7*
forvalues j=1(1)4{
replace tot_day`j'=0 if tot_day`j'==.
}
gen totaldays=tot_day1+tot_day2+tot_day3+tot_day4
forvalues j=1(1)4{
replace tot_day`j'=tot_day`j'*(7/totaldays)
}
drop totaldays




foreach j in hh_nic w_nic pr_nic sub_nic{
ren `j' `j'70
gen nic70=substr(`j'70,1,2)
sort nic70
merge m:1 nic70 using "`rootdir'nic70_87_2digit"
drop nic70 _merge
ren nic87 nic87mult
merge m:1 nic87mult using "`rootdir'nic87_87_2digit"
replace nic87mult=nic87 if _merge==3
drop nic87 _merge
ren nic87mult `j' 
}




**status code conversion
gen sick=0
replace sick=1 if status_wa==98 | status_wa==61 | status_wa==71
* | missing from this round

foreach j in status_da1 status_da2 status_da3 status_da4 status_wa pr_status sub_status{
replace `j'=61 if `j'==62
replace `j'=71 if `j'==72
replace `j'=97 if `j'==98
replace `j'=. if `j'==99
}
**what is 99 here? -- too young (or old?)


***education
gen generaledmod=gen_edu
replace generaledmod=6 if generaledmod==9 | generaledmod==7 | generaledmod==8
replace generaledmod=0 if generaledmod==10
replace generaledmod=. if age<18 | age==. | sex==.
ren cow_dung prepdung

ren status_wa w_status
cap: gen segment=1


#delimit ;
keep round sector subround subsample region fsu hhno segment substratum srl_n_m districtcode/*identifier vars*/
     hhsize hh_nic hh_nco hhtype religion hhgroup land pce /*demographic vars*/
	 rel_to_hd sex age generaledmod
			sick
			hh_nic70 w_nic70 pr_nic70 sub_nic70 
	 	 	 hh_nic w_nic pr_nic sub_nic 
			 w_status pr_status sub_status 
			 hh_nco w_nco pr_nco sub_nco
			 tot_day1-tot_day4 status_da1-status_da4 inddivcoded1-inddivcoded4 opcode1-opcode4 wage_tot1-wage_tot4
 /*activity vars*/
	 /*geographic vars*/
	 mult /*multiplier vars*/
	 domduty-assreq prepdung /*domestic variables*/
	 grains pulses milk oils veg fru meat food_other
	intox light entertain education medical nondurables transport rent
	services clothing durables tax 
	totexp xfood wfood wgrains 
	irdp *publicworks*
	/*consumption variables*/
	 ;
#delimit cr


cap: replace round=43

save "`rootdir'round43_sch10pers", replace










*******55th round**********
cd "`rootdir'round55\"

*multipliers
infile using "55mult_RU1.dct", clear
keep if level==1
keep round-fsu mult211 mult212 mult221 mult222 mult219 mult229 seg1sss1-seg2sss2
drop district
save "55mult_RU1.dta", replace
infile using "55mult_RU2.dct", clear
keep if level==1
keep round-fsu mult211 mult212 mult221 mult222 mult219 mult229 seg1sss1-seg2sss2
drop district
save "55mult_RU2.dta", replace
infile using "55mult_RU3.dct", clear
keep if level==1
keep round-fsu mult211 mult212 mult221 mult222 mult219 mult229 seg1sss1-seg2sss2
drop district
save "55mult_RU3.dta", replace
infile using "55mult_RU4.dct", clear
keep if level==1
keep round-fsu mult211 mult212 mult221 mult222 mult219 mult229 seg1sss1-seg2sss2
drop district
save "55mult_RU4.dta", replace

use "55mult_RU1.dta", clear
append using "55mult_RU2.dta"
append using "55mult_RU3.dta"
append using "55mult_RU4.dta"
drop sched centralstate

sort subround fsu

save "55mult.dta", replace



//load level 1 (hh data)
infile using "55lvl1_R1.dct", clear
keep if level == 1
save "55lvl1_R1.dta", replace
infile using "55lvl1_R2.dct", clear
keep if level == 1
save "55lvl1_R2.dta", replace
infile using "55lvl1_R3.dct", clear
keep if level == 1
save "55lvl1_R3.dta", replace
infile using "55lvl1_R4.dct", clear
keep if level == 1
save "55lvl1_R4.dta", replace
infile using "55lvl1_U1.dct", clear
keep if level == 1
save "55lvl1_U1.dta", replace
infile using "55lvl1_U2.dct", clear
keep if level == 1
save "55lvl1_U2.dta", replace
infile using "55lvl1_U3.dct", clear
keep if level == 1
save "55lvl1_U3.dta", replace
infile using "55lvl1_U4.dct", clear
keep if level == 1
save "55lvl1_U4.dta", replace

use "55lvl1_R1.dta", clear
append using "55lvl1_R2.dta"
append using "55lvl1_R3.dta"
append using "55lvl1_R4.dta"
append using "55lvl1_U1.dta"
append using "55lvl1_U2.dta"
append using "55lvl1_U3.dta"
append using "55lvl1_U4.dta"
capture: drop blank
save "55lvl1.dta", replace

//load level 2 (loan data?)

//load level 3 (household roster data)
infile using "55lvl3_R1.dct", clear
keep if level == 3
save "55lvl3_R1.dta", replace
infile using "55lvl3_R2.dct", clear
keep if level == 3
save "55lvl3_R2.dta", replace
infile using "55lvl3_R3.dct", clear
keep if level == 3
save "55lvl3_R3.dta", replace
infile using "55lvl3_R4.dct", clear
keep if level == 3
save "55lvl3_R4.dta", replace
infile using "55lvl3_U1.dct", clear
keep if level == 3
save "55lvl3_U1.dta", replace
infile using "55lvl3_U2.dct", clear
keep if level == 3
save "55lvl3_U2.dta", replace
infile using "55lvl3_U3.dct", clear
keep if level == 3
save "55lvl3_U3.dta", replace
infile using "55lvl3_U4.dct", clear
keep if level == 3
save "55lvl3_U4.dta", replace

use "55lvl3_R1.dta", clear
append using "55lvl3_R2.dta"
append using "55lvl3_R3.dta"
append using "55lvl3_R4.dta"
append using "55lvl3_U1.dta"
append using "55lvl3_U2.dta"
append using "55lvl3_U3.dta"
append using "55lvl3_U4.dta"
capture: drop blank
save "55lvl3.dta", replace

//load level 4 (principle employment data)
infile using "55lvl4_R1.dct", clear
keep if level == 4
save "55lvl4_R1.dta", replace
infile using "55lvl4_R2.dct", clear
keep if level == 4
save "55lvl4_R2.dta", replace
infile using "55lvl4_R3.dct", clear
keep if level == 4
save "55lvl4_R3.dta", replace
infile using "55lvl4_R4.dct", clear
keep if level == 4
save "55lvl4_R4.dta", replace
infile using "55lvl4_U1.dct", clear
keep if level == 4
save "55lvl4_U1.dta", replace
infile using "55lvl4_U2.dct", clear
keep if level == 4
save "55lvl4_U2.dta", replace
infile using "55lvl4_U3.dct", clear
keep if level == 4
save "55lvl4_U3.dta", replace
infile using "55lvl4_U4.dct", clear
keep if level == 4
save "55lvl4_U4.dta", replace

use "55lvl4_R1.dta", clear
append using "55lvl4_R2.dta"
append using "55lvl4_R3.dta"
append using "55lvl4_R4.dta"
append using "55lvl4_U1.dta"
append using "55lvl4_U2.dta"
append using "55lvl4_U3.dta"
append using "55lvl4_U4.dta"
capture: drop blank
save "55lvl4.dta", replace

//load level 5 (subsidiary employment data data)
infile using "55lvl5_R1.dct", clear
keep if level == 5
save "55lvl5_R1.dta", replace
infile using "55lvl5_R2.dct", clear
keep if level == 5
save "55lvl5_R2.dta", replace
infile using "55lvl5_R3.dct", clear
keep if level == 5
save "55lvl5_R3.dta", replace
infile using "55lvl5_R4.dct", clear
keep if level == 5
save "55lvl5_R4.dta", replace
infile using "55lvl5_U1.dct", clear
keep if level == 5
save "55lvl5_U1.dta", replace
infile using "55lvl5_U2.dct", clear
keep if level == 5
save "55lvl5_U2.dta", replace
infile using "55lvl5_U3.dct", clear
keep if level == 5
save "55lvl5_U3.dta", replace
infile using "55lvl5_U4.dct", clear
keep if level == 5
save "55lvl5_U4.dta", replace

use "55lvl5_R1.dta", clear
append using "55lvl5_R2.dta"
append using "55lvl5_R3.dta"
append using "55lvl5_R4.dta"
append using "55lvl5_U1.dta"
append using "55lvl5_U2.dta"
append using "55lvl5_U3.dta"
append using "55lvl5_U4.dta"
capture: drop blank
save "55lvl5.dta", replace

//load level 6 (activity data)
infile using "55lvl6_R1.dct", clear
keep if level == 6
save "55lvl6_R1.dta", replace
infile using "55lvl6_R2.dct", clear
keep if level == 6
save "55lvl6_R2.dta", replace
infile using "55lvl6_R3.dct", clear
keep if level == 6
save "55lvl6_R3.dta", replace
infile using "55lvl6_R4.dct", clear
keep if level == 6
save "55lvl6_R4.dta", replace
infile using "55lvl6_U1.dct", clear
keep if level == 6
save "55lvl6_U1.dta", replace
infile using "55lvl6_U2.dct", clear
keep if level == 6
save "55lvl6_U2.dta", replace
infile using "55lvl6_U3.dct", clear
keep if level == 6
save "55lvl6_U3.dta", replace
infile using "55lvl6_U4.dct", clear
keep if level == 6
save "55lvl6_U4.dta", replace

use "55lvl6_R1.dta", clear
append using "55lvl6_R2.dta"
append using "55lvl6_R3.dta"
append using "55lvl6_R4.dta"
append using "55lvl6_U1.dta"
append using "55lvl6_U2.dta"
append using "55lvl6_U3.dta"
append using "55lvl6_U4.dta"
capture: drop blank
save "55lvl6.dta", replace

//load level 11 (consumption data)
infile using "55lvl11_R1.dct", clear
keep if level == 11
save "55lvl11_R1.dta", replace
infile using "55lvl11_R2.dct", clear
keep if level == 11
save "55lvl11_R2.dta", replace
infile using "55lvl11_R3.dct", clear
keep if level == 11
save "55lvl11_R3.dta", replace
infile using "55lvl11_R4.dct", clear
keep if level == 11
save "55lvl11_R4.dta", replace
infile using "55lvl11_U1.dct", clear
keep if level == 11
save "55lvl11_U1.dta", replace
infile using "55lvl11_U2.dct", clear
keep if level == 11
save "55lvl11_U2.dta", replace
infile using "55lvl11_U3.dct", clear
keep if level == 11
save "55lvl11_U3.dta", replace
infile using "55lvl11_U4.dct", clear
keep if level == 11
save "55lvl11_U4.dta", replace

use "55lvl11_R1.dta", clear
append using "55lvl11_R2.dta"
append using "55lvl11_R3.dta"
append using "55lvl11_R4.dta"
append using "55lvl11_U1.dta"
append using "55lvl11_U2.dta"
append using "55lvl11_U3.dta"
append using "55lvl11_U4.dta"



//reshape level 11
drop wrkfileid filler1 filler2 blank other_id updatecode

egen id = group(statereg55 subround subsample flot fsu segment ssstrat hhno)
rename valcons valcons_
reshape wide valcons, i(id) j(srl_n_c)

//aggregate consumption data and convert annual to monthly
gen grains = valcons_100
gen pulses = valcons_200
gen milk = valcons_300
gen oils = valcons_400
gen veg = valcons_500
gen fru = valcons_600
gen meat = valcons_700
gen food_other = valcons_800		
label var food_other "other food, inc. sugar, spices, processed foods"
gen intox = valcons_900
gen light = valcons_1000
gen entertain = valcons_1201 + valcons_1203
gen education = valcons_1202 + (valcons_1301 * (30/365))
gen medical = valcons_1204 + (valcons_1302 * (30/365))
gen nondurables = valcons_1205
gen transport = valcons_1206
gen rent = valcons_1207
gen services = valcons_1208 	//other services
label var services "other consumer services"
gen clothing = valcons_1400 * (30/365)
gen footwear = valcons_1500 * (30/365)
gen durables = valcons_1699 * (30/365)


drop valcons_100 valcons_200 valcons_300 valcons_400 valcons_500 valcons_600 valcons_700 valcons_800 valcons_900 valcons_1000 valcons_1100 valcons_1201 ///
 valcons_1202 valcons_1203 valcons_1204 valcons_1205 valcons_1206 valcons_1207 valcons_1208 valcons_1209 valcons_1301 valcons_1302 valcons_1309 valcons_1400 ///
 valcons_1500 valcons_1601 valcons_1602 valcons_1603 valcons_1604 valcons_1605 valcons_1606 valcons_1607 valcons_1608 valcons_1609 valcons_1610 valcons_1699 ///
 valcons_1700 valcons_1800 valcons_1900 

capture: drop blank
save "55lvl11.dta", replace









//load level 10  (house work data)
infile using "55lvl10_R1.dct", clear
keep if level == 10
save "55lvl10_R1.dta", replace
infile using "55lvl10_R2.dct", clear
keep if level == 10
save "55lvl10_R2.dta", replace
infile using "55lvl10_R3.dct", clear
keep if level == 10
save "55lvl10_R3.dta", replace
infile using "55lvl10_R4.dct", clear
keep if level == 10
save "55lvl10_R4.dta", replace
infile using "55lvl10_U1.dct", clear
keep if level == 10
save "55lvl10_U1.dta", replace
infile using "55lvl10_U2.dct", clear
keep if level == 10
save "55lvl10_U2.dta", replace
infile using "55lvl10_U3.dct", clear
keep if level == 10
save "55lvl10_U3.dta", replace
infile using "55lvl10_U4.dct", clear
keep if level == 10
save "55lvl10_U4.dta", replace

use "55lvl10_R1.dta", clear
append using "55lvl10_R2.dta"
append using "55lvl10_R3.dta"
append using "55lvl10_R4.dta"
append using "55lvl10_U1.dta"
append using "55lvl10_U2.dta"
append using "55lvl10_U3.dta"
append using "55lvl10_U4.dta"
capture: drop blank
save "55lvl10.dta", replace






//remove duplicates before merge
*level1 ok


use "55lvl3.dta", clear

bysort round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno srl_n_m: gen counted=_N
tab counted
*browse if counted>1

*replace error srlnumber
replace srl_n_m=309 if counted==2 & sex==2
drop counted

duplicates drop round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno srl_n_m, force
save "55lvl3_mod.dta", replace

*4 is ok

*5 sub_status_num is key, but still one perfect duplicate and one slight one

use "55lvl5.dta", clear
drop if sub_status_num~=1
bysort round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno srl_n_m: gen counted=_N
tab counted
duplicates drop round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno srl_n_m, force
save "55lvl5_mod.dta", replace

*6- multiple activities, ok
*10,11 ok





use "55lvl6.dta", clear
merge m:1 round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno using "55lvl1.dta"
tab _merge
drop _merge
merge m:1 round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno srl_n_m using "55lvl3_mod.dta"
tab _merge
drop _merge
merge m:1 round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno srl_n_m using "55lvl4.dta"
tab _merge
drop _merge
merge m:1 round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno srl_n_m using "55lvl5_mod.dta"
tab _merge
drop _merge
merge m:1 round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno srl_n_m using "55lvl10.dta"
tab _merge
drop _merge
merge m:1 round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno using "55lvl11.dta"
tab _merge
drop _merge

drop filler*

ren nummaleswork publicworks_male
ren numfemaleswork publicworks_female

******merge 1,3,4,5,6,10,11
****round 55 is really missing the person file-- should add it in since it is everywhere else!


drop id
//combine multiple activities per individual into single observation
egen hhid= group(round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno) 
egen id = group(round sched statereg55 subround subsample flot fsu visit segment ssstrat hhno srl_n_m)
*browse if id==id[_n-1] & prepgur~=prepgur[_n-1]
*fill in for now
*drop if srl_n_a==.
replace srl_n_a=1 if srl_n_a==.

*some variables not constant within id, but that is expected
*should those be reshaped?
drop recordnum a1 a2 a3 a4 a5 a6 a7 paymode ue7days updatecode dayswork

*tot_day seems to be off by factor of 10
replace tot_day=tot_day/10

ren w_nic55 w_nic
ren w_nco55 w_nco
ren opcode55 opcode
ren inddivcoded55 inddivcoded
ren pr_nic55 pr_nic
ren pr_nco55 pr_nco
ren sub_nic55 sub_nic
ren sub_nco55 sub_nco

sort id srl_n_a
foreach j in w_nic w_nco{
replace `j'=`j'[_n-1] if `j'=="" & id==id[_n-1]
replace `j'=`j'[_n+1] if `j'=="" & id==id[_n+1]
replace `j'=`j'[_n-1] if `j'=="" & id==id[_n-1]
replace `j'=`j'[_n+1] if `j'=="" & id==id[_n+1]
replace `j'=`j'[_n-1] if `j'=="" & id==id[_n-1]
replace `j'=`j'[_n+1] if `j'=="" & id==id[_n+1]
}

foreach j in status_wa{
replace `j'=`j'[_n-1] if `j'==. & id==id[_n-1]
replace `j'=`j'[_n+1] if `j'==. & id==id[_n+1]
replace `j'=`j'[_n-1] if `j'==. & id==id[_n-1]
replace `j'=`j'[_n+1] if `j'==. & id==id[_n+1]
replace `j'=`j'[_n-1] if `j'==. & id==id[_n-1]
replace `j'=`j'[_n+1] if `j'==. & id==id[_n+1]
}

duplicates drop id srl_n_a, force

***relabel activities, dropping zeros
replace tot_day=0 if tot_day==.
sort id tot_day
by id: gen counter=_N+1-_n


replace tot_day=. if tot_day==0

replace srl_n_a=counter
drop counter

reshape wide status_da opcode inddivcoded tot_day wage_cash wage_kind wage_tot, i(id) j(srl_n_a)

****add multipliers
sort fsu
merge m:1 subround fsu using "55mult.dta"
tab _merge
keep if _merge==3
drop _merge
gen mult_subrnd=.
replace mult_subrnd=mult211 if segment==1 & ssstrat==1
replace mult_subrnd=mult212 if segment==1 & ssstrat==2
replace mult_subrnd=mult221 if segment==2 & ssstrat==1
replace mult_subrnd=mult222 if segment==2 & ssstrat==2
summ id mult_subrnd

gen mult=mult_subrnd/800


***rename and keep

ren statereg55 region
ren ssstrat substratum
ren hhsz hhsize
ren hhlandp land
replace land=99.99 if land>99.99 & land~=.
ren hhreligion religion

*note that these are missing for 55th round, we will create them but leave them blank for now
gen hh_nic=""
gen hh_nco=""

ren district districtcode

# delimit ;

keep sector subround subsample region fsu segment substratum hhno srl_n_m districtcode
hhsize land pce hhtype religion hhgroup
hh_nic hh_nco
rel_to_hd sex age gen_edu
	status_wa pr_status sub_status
	w_nic w_nco pr_nic pr_nco sub_nic sub_nco 
	status_da1-wage_tot5
	/*activity vars*/
	 mult /*multiplier vars*/ 
	 domduty-assreq
	 grains-durables
	 publicworks*
	 ;

# delimit cr


gen tax=0
replace clothing=0 if clothing==.
replace footwear=0 if footwear==.
replace clothing=clothing+footwear
drop footwear


**expenditure and household size consistency checks
foreach j in durables education entertain fru grains intox light meat medical milk nondurables oils pulses services transport veg food_other clothing{
replace `j'=0 if `j'==.
}


cap: drop totexp
cap: drop xfood
gen totexp=durables+education+entertain+fru+grains+intox+light+meat+medical+milk+nondurables+oils+pulses+services+transport+veg+food_other+clothing


gen xfood=fru+grains+meat+milk+oils+pulses+veg+food_other
gen wfood=xfood/totexp
gen wgrains=grains/xfood

capture: drop _merge
***day re-scaling/cleaning
***almost no one with more than four activities, we will censor it there and rescale days
cap: drop *5*
cap: drop *6*
cap: drop *7*

forvalues j=1(1)4{
replace tot_day`j'=0 if tot_day`j'==.
}
gen totaldays=tot_day1+tot_day2+tot_day3+tot_day4
forvalues j=1(1)4{
replace tot_day`j'=tot_day`j'*(7/totaldays)
}
drop totaldays



**note that for 55th round only hh_nic, hh_nco are missing! not sure how they are constructed - based on maximum income over the last year, so can't calculate it exactly...
**generate a hh nic manually using primary activity of household head/serial number one


egen hhid=group(sector subround subsample region fsu substratum segment hhno)
bysort hhid: gen hhcounter=_n
replace srl_n_m=hhcounter
drop hhcounter
summ srl_n_m


replace hh_nic=pr_nic if hh_nic=="" & rel_to_hd==1
bysort hhid: replace hh_nic=hh_nic[_n-1] if hh_nic=="" & hhid==hhid[_n-1]

forvalues j=1(1)37{
replace hh_nic=pr_nic if hh_nic=="" & srl_n_m==`j'
bysort hhid: replace hh_nic=hh_nic[_n-1] if hh_nic=="" & hhid==hhid[_n-1]
}



replace hh_nco=pr_nco if hh_nco=="" & rel_to_hd==1
bysort hhid: replace hh_nco=hh_nco[_n-1] if hh_nco=="" & hhid==hhid[_n-1]

forvalues j=1(1)37{
replace hh_nco=pr_nco if hh_nco=="" & srl_n_m==`j'
bysort hhid: replace hh_nco=hh_nco[_n-1] if hh_nco=="" & hhid==hhid[_n-1]
}







foreach j in hh_nic w_nic pr_nic sub_nic{
gen temp`j'=""
gen nic98=substr(`j',1,4)
merge m:1 nic98 using "`rootdir'nic98_87"
replace temp`j'=nic87 if _merge==3
drop _merge nic98 nic87
gen nic98=substr(`j',1,3)
merge m:1 nic98 using "`rootdir'nic98_87"
replace temp`j'=nic87 if _merge==3
drop _merge nic98 nic87
gen nic98=substr(`j',1,2)
merge m:1 nic98 using "`rootdir'nic98_87"
replace temp`j'=nic87 if _merge==3
drop _merge nic98 nic87
ren `j' `j'98
gen `j'=substr(temp`j',1,2)
*drop temp`j'
}

***individual activity codes need to be converted too (to 1-digit for compatibility)
*convert to master, and then convert to 87 division codes...
forvalues j=1(1)4{
ren inddivcoded`j' nic98
merge m:1 nic98 using "`rootdir'nic98_87_div"
drop _merge nic98
ren nic87 inddivcoded`j'
destring inddivcoded`j', replace
destring opcode`j', replace
*replace inddivcoded`j'=floor(inddivcoded`j'/10)
}


gen sick=0
replace sick=1 if status_wa==61 | status_wa==71 | status_wa==98
*status code conversion
foreach j in status_da1 status_da2 status_da3 status_da4 status_wa pr_status sub_status{
replace `j'=11 if `j'==12
replace `j'=61 if `j'==62
replace `j'=71 if `j'==72
replace `j'=97 if `j'==99
replace `j'=97 if `j'==98
}

**education
gen generaledmod=gen_edu
replace generaledmod=0 if generaledmod==1
replace generaledmod=1 if generaledmod==2 | generaledmod==3 | generaledmod==4
replace generaledmod=2 if generaledmod==5
replace generaledmod=3 if generaledmod==6
replace generaledmod=4 if generaledmod==7
replace generaledmod=5 if generaledmod==8 | generaled==9
replace generaledmod=6 if generaledmod>9
replace generaledmod=. if age<18 | age==. | sex==.

*hh type group religion
replace hhgroup=9 if hhgroup==3
replace religion=9 if religion==0



cap: gen round=55


ren status_wa w_status
#delimit ;
keep round sector subround subsample region fsu hhno segment substratum srl_n_m districtcode/*identifier vars*/
     hhsize  hhtype religion hhgroup land pce /*demographic vars*/
	 rel_to_hd sex age generaledmod
	 sick
			hh_nic98 w_nic98 pr_nic98 sub_nic98 
	 	 	 hh_nic w_nic pr_nic sub_nic 
			 w_status pr_status sub_status 
			 hh_nco w_nco pr_nco sub_nco
			 tot_day1-tot_day4 status_da1-status_da4 inddivcoded1-inddivcoded4 opcode1-opcode4 wage_tot1-wage_tot4
 /*activity vars*/
	 /*geographic vars*/
	 mult /*multiplier vars*/
	 domduty-assreq
	 /*domestic variables*/
	 grains pulses milk oils veg fru meat food_other
	intox light entertain education medical nondurables transport rent
	services clothing durables tax 
	totexp xfood wfood wgrains 
	publicworks*
	/*consumption variables*/
	 ;
#delimit cr

cap: replace round=55

save "`rootdir'round55_sch10pers", replace



****Round 61*********

cd "`rootdir'\round61\"



//Load level 1 and append
infile using "61_lvl_1_ah12.dct", clear
keep if level == 1
replace  hhlando=hhlando/1000
replace hhlandp=hhlandp/1000
replace hhlandc=hhlandc/1000
save "61_lvl_1_ah12.dta", replace
infile using "61_lvl_1_ah22.dct", clear
replace  hhlando=hhlando/1000
replace hhlandp=hhlandp/1000
replace hhlandc=hhlandc/1000
keep if level == 1
save "61_lvl_1_ah22.dta", replace
infile using "61_lvl_1_ah32.dct", clear
replace  hhlando=hhlando/1000
replace hhlandp=hhlandp/1000
replace hhlandc=hhlandc/1000
keep if level == 1
save "61_lvl_1_ah32.dta", replace
infile using "61_lvl_1_ah42.dct", clear
replace  hhlando=hhlando/1000
replace hhlandp=hhlandp/1000
replace hhlandc=hhlandc/1000
keep if level == 1
save "61_lvl_1_ah42.dta", replace
infile using "61_lvl_1_ah52.dct", clear
replace  hhlando=hhlando/1000
replace hhlandp=hhlandp/1000
replace hhlandc=hhlandc/1000
keep if level == 1
save "61_lvl_1_ah52.dta", replace
infile using "61_lvl_1_ah62.dct", clear
replace  hhlando=hhlando/1000
replace hhlandp=hhlandp/1000
replace hhlandc=hhlandc/1000
keep if level == 1
save "61_lvl_1_ah62.dta", replace
infile using "61_lvl_1_ah72.dct", clear
replace  hhlando=hhlando/1000
replace hhlandp=hhlandp/1000
replace hhlandc=hhlandc/1000
keep if level == 1
save "61_lvl_1_ah72.dta", replace
infile using "61_lvl_1_ah82.dct", clear
replace  hhlando=hhlando/1000
replace hhlandp=hhlandp/1000
replace hhlandc=hhlandc/1000
keep if level == 1
save "61_lvl_1_ah82.dta", replace

use "61_lvl_1_ah12.dta", clear
append using "61_lvl_1_ah22.dta"
append using "61_lvl_1_ah32.dta"
append using "61_lvl_1_ah42.dta"
append using "61_lvl_1_ah52.dta"
append using "61_lvl_1_ah62.dta"
append using "61_lvl_1_ah72.dta"
append using "61_lvl_1_ah82.dta"

drop blank filler charstmp
save "61lvl1.dta", replace

//Load level 2 and append
infile using "61_lvl_2_ah12.dct", clear
keep if level == 2
save "61_lvl_2_ah12.dta", replace
infile using "61_lvl_2_ah22.dct", clear
keep if level == 2
save "61_lvl_2_ah22.dta", replace
infile using "61_lvl_2_ah32.dct", clear
keep if level == 2
save "61_lvl_2_ah32.dta", replace
infile using "61_lvl_2_ah42.dct", clear
keep if level == 2
save "61_lvl_2_ah42.dta", replace
infile using "61_lvl_2_ah52.dct", clear
keep if level == 2
save "61_lvl_2_ah52.dta", replace
infile using "61_lvl_2_ah62.dct", clear
keep if level == 2
save "61_lvl_2_ah62.dta", replace
infile using "61_lvl_2_ah72.dct", clear
keep if level == 2
save "61_lvl_2_ah72.dta", replace
infile using "61_lvl_2_ah82.dct", clear
keep if level == 2
save "61_lvl_2_ah82.dta", replace

use "61_lvl_2_ah12.dta", clear
append using "61_lvl_2_ah22.dta"
append using "61_lvl_2_ah32.dta"
append using "61_lvl_2_ah42.dta"
append using "61_lvl_2_ah52.dta"
append using "61_lvl_2_ah62.dta"
append using "61_lvl_2_ah72.dta"
append using "61_lvl_2_ah82.dta"

drop blank filler charstmp
save "61lvl2.dta", replace

//Load level 3 and append
infile using "61_lvl_3_ah12.dct", clear
keep if level == 3
save "61_lvl_3_ah12.dta", replace
infile using "61_lvl_3_ah22.dct", clear
keep if level == 3
save "61_lvl_3_ah22.dta", replace
infile using "61_lvl_3_ah32.dct", clear
keep if level == 3
save "61_lvl_3_ah32.dta", replace
infile using "61_lvl_3_ah42.dct", clear
keep if level == 3
save "61_lvl_3_ah42.dta", replace
infile using "61_lvl_3_ah52.dct", clear
keep if level == 3
save "61_lvl_3_ah52.dta", replace
infile using "61_lvl_3_ah62.dct", clear
keep if level == 3
save "61_lvl_3_ah62.dta", replace
infile using "61_lvl_3_ah72.dct", clear
keep if level == 3
save "61_lvl_3_ah72.dta", replace
infile using "61_lvl_3_ah82.dct", clear
keep if level == 3
save "61_lvl_3_ah82.dta", replace

use "61_lvl_3_ah12.dta", clear
append using "61_lvl_3_ah22.dta"
append using "61_lvl_3_ah32.dta"
append using "61_lvl_3_ah42.dta"
append using "61_lvl_3_ah52.dta"
append using "61_lvl_3_ah62.dta"
append using "61_lvl_3_ah72.dta"
append using "61_lvl_3_ah82.dta"

*gen_edu current_attend

drop blank filler charstmp
save "61lvl3.dta", replace

//Load level 4 and append
infile using "61_lvl_4_ah12.dct", clear
keep if level == 4
save "61_lvl_4_ah12.dta", replace
infile using "61_lvl_4_ah22.dct", clear
keep if level == 4
save "61_lvl_4_ah22.dta", replace
infile using "61_lvl_4_ah32.dct", clear
keep if level == 4
save "61_lvl_4_ah32.dta", replace
infile using "61_lvl_4_ah42.dct", clear
keep if level == 4
save "61_lvl_4_ah42.dta", replace
infile using "61_lvl_4_ah52.dct", clear
keep if level == 4
save "61_lvl_4_ah52.dta", replace
infile using "61_lvl_4_ah62.dct", clear
keep if level == 4
save "61_lvl_4_ah62.dta", replace
infile using "61_lvl_4_ah72.dct", clear
keep if level == 4
save "61_lvl_4_ah72.dta", replace
infile using "61_lvl_4_ah82.dct", clear
keep if level == 4
save "61_lvl_4_ah82.dta", replace

use "61_lvl_4_ah12.dta", clear
append using "61_lvl_4_ah22.dta"
append using "61_lvl_4_ah32.dta"
append using "61_lvl_4_ah42.dta"
append using "61_lvl_4_ah52.dta"
append using "61_lvl_4_ah62.dta"
append using "61_lvl_4_ah72.dta"
append using "61_lvl_4_ah82.dta"

drop blank filler charstmp
save "61lvl4.dta", replace

*use_elect volunt_nic

//Load level 5 and append
infile using "61_lvl_5_ah12.dct", clear
keep if level == 5
save "61_lvl_5_ah12.dta", replace
infile using "61_lvl_5_ah22.dct", clear
keep if level == 5
save "61_lvl_5_ah22.dta", replace
infile using "61_lvl_5_ah32.dct", clear
keep if level == 5
save "61_lvl_5_ah32.dta", replace
infile using "61_lvl_5_ah42.dct", clear
keep if level == 5
save "61_lvl_5_ah42.dta", replace
infile using "61_lvl_5_ah52.dct", clear
keep if level == 5
save "61_lvl_5_ah52.dta", replace
infile using "61_lvl_5_ah62.dct", clear
keep if level == 5
save "61_lvl_5_ah62.dta", replace
infile using "61_lvl_5_ah72.dct", clear
keep if level == 5
save "61_lvl_5_ah72.dta", replace
infile using "61_lvl_5_ah82.dct", clear
keep if level == 5
save "61_lvl_5_ah82.dta", replace

use "61_lvl_5_ah12.dta", clear
append using "61_lvl_5_ah22.dta"
append using "61_lvl_5_ah32.dta"
append using "61_lvl_5_ah42.dta"
append using "61_lvl_5_ah52.dta"
append using "61_lvl_5_ah62.dta"
append using "61_lvl_5_ah72.dta"
append using "61_lvl_5_ah82.dta"

drop blank filler charstmp
save "61lvl5.dta", replace

*sub_num_workers

//Load level 6 and append
infile using "61_lvl_6_ah12.dct", clear
keep if level == 6
save "61_lvl_6_ah12.dta", replace
infile using "61_lvl_6_ah22.dct", clear
keep if level == 6
save "61_lvl_6_ah22.dta", replace
infile using "61_lvl_6_ah32.dct", clear
keep if level == 6
save "61_lvl_6_ah32.dta", replace
infile using "61_lvl_6_ah42.dct", clear
keep if level == 6
save "61_lvl_6_ah42.dta", replace
infile using "61_lvl_6_ah52.dct", clear
keep if level == 6
save "61_lvl_6_ah52.dta", replace
infile using "61_lvl_6_ah62.dct", clear
keep if level == 6
save "61_lvl_6_ah62.dta", replace
infile using "61_lvl_6_ah72.dct", clear
keep if level == 6
save "61_lvl_6_ah72.dta", replace
infile using "61_lvl_6_ah82.dct", clear
keep if level == 6
save "61_lvl_6_ah82.dta", replace

use "61_lvl_6_ah12.dta", clear
append using "61_lvl_6_ah22.dta"
append using "61_lvl_6_ah32.dta"
append using "61_lvl_6_ah42.dta"
append using "61_lvl_6_ah52.dta"
append using "61_lvl_6_ah62.dta"
append using "61_lvl_6_ah72.dta"
append using "61_lvl_6_ah82.dta"

drop blank filler charstmp
save "61lvl6.dta", replace


*tot_day

//Load level 11 and append
infile using "61_lvl_11_ah12.dct", clear
keep if level == 11
save "61_lvl_11_ah12.dta", replace
infile using "61_lvl_11_ah22.dct", clear
keep if level == 11
save "61_lvl_11_ah22.dta", replace
infile using "61_lvl_11_ah32.dct", clear
keep if level == 11
save "61_lvl_11_ah32.dta", replace
infile using "61_lvl_11_ah42.dct", clear
keep if level == 11
save "61_lvl_11_ah42.dta", replace
infile using "61_lvl_11_ah52.dct", clear
keep if level == 11
save "61_lvl_11_ah52.dta", replace
infile using "61_lvl_11_ah62.dct", clear
keep if level == 11
save "61_lvl_11_ah62.dta", replace
infile using "61_lvl_11_ah72.dct", clear
keep if level == 11
save "61_lvl_11_ah72.dta", replace
infile using "61_lvl_11_ah82.dct", clear
keep if level == 11
save "61_lvl_11_ah82.dta", replace

use "61_lvl_11_ah12.dta", clear
append using "61_lvl_11_ah22.dta"
append using "61_lvl_11_ah32.dta"
append using "61_lvl_11_ah42.dta"
append using "61_lvl_11_ah52.dta"
append using "61_lvl_11_ah62.dta"
append using "61_lvl_11_ah72.dta"
append using "61_lvl_11_ah82.dta"

drop blank filler charstmp

//reshape level 12
drop cnt_rnd_shft

*segment is hamlet here



egen id = group(sector subround subsample statereg61 fsu hamlet ssstrat hhno)
rename valcons30 valcons30_
rename valcons365 valcons365_
reshape wide valcons30 valcons365, i(id) j(srl_n_c)

//aggregate consumption data and convert annual to monthly
gen grains = valcons30_1
gen pulses = valcons30_2
gen milk = valcons30_3 + valcons30_4
gen oils = valcons30_5
gen veg = valcons30_6
gen fru = valcons30_7
gen meat = valcons30_8
gen sugar = valcons30_9 + valcons30_10
gen proc = valcons30_11		//srl no. 11 not exclusively processed food (other)...
gen intox = valcons30_12
gen light = valcons30_13
gen entertain = valcons30_14
gen nondurables = valcons30_15 + valcons30_16 + valcons30_17
gen services = valcons30_18 //??? consumer services
gen transport = valcons30_19	//includes fuel costs...
gen rent = valcons30_20
gen taxes = valcons30_21
gen medical = valcons30_22 + (valcons365_24 * (30/365))
gen education = (valcons365_25 + valcons365_26) * (30/365)
gen clothing = valcons365_27 * (30/365)
gen footwear = valcons365_28 * (30/365)
gen durables = (valcons365_29 + valcons365_30 + valcons365_31 + valcons365_32 + valcons365_33 + valcons365_34 + valcons365_35 + valcons365_36 + valcons365_37) * (30/365)

drop valcons*

save "61lvl11.dta", replace




//Load level 10 and append
infile using "61_lvl_10_ah12.dct", clear
keep if level == 10
save "61_lvl_10_ah12.dta", replace
infile using "61_lvl_10_ah22.dct", clear
keep if level == 10
save "61_lvl_10_ah22.dta", replace
infile using "61_lvl_10_ah32.dct", clear
keep if level == 10
save "61_lvl_10_ah32.dta", replace
infile using "61_lvl_10_ah42.dct", clear
keep if level == 10
save "61_lvl_10_ah42.dta", replace
infile using "61_lvl_10_ah52.dct", clear
keep if level == 10
save "61_lvl_10_ah52.dta", replace
infile using "61_lvl_10_ah62.dct", clear
keep if level == 10
save "61_lvl_10_ah62.dta", replace
infile using "61_lvl_10_ah72.dct", clear
keep if level == 10
save "61_lvl_10_ah72.dta", replace
infile using "61_lvl_10_ah82.dct", clear
keep if level == 10
save "61_lvl_10_ah82.dta", replace

use "61_lvl_10_ah12.dta", clear
append using "61_lvl_10_ah22.dta"
append using "61_lvl_10_ah32.dta"
append using "61_lvl_10_ah42.dta"
append using "61_lvl_10_ah52.dta"
append using "61_lvl_10_ah62.dta"
append using "61_lvl_10_ah72.dta"
append using "61_lvl_10_ah82.dta"

drop blank 
duplicates drop
bysort sector subround subsample statereg61 fsu hamlet ssstrat hhno srl_n_m : gen counter=_N
drop if domreas2==. & counter>1
drop counter

save "61lvl10.dta", replace






*valcons30 valcons365

//merge levels
use "61lvl6.dta", clear

merge m:1 sector subround subsample statereg61 fsu hamlet ssstrat hhno using "61lvl1.dta"
tab _merge
drop _merge
merge m:1 sector subround subsample statereg61 fsu hamlet ssstrat hhno srl_n_m using "61lvl3.dta"
tab _merge
drop _merge
merge m:1 sector subround subsample statereg61 fsu hamlet ssstrat hhno srl_n_m using "61lvl4.dta"
tab _merge
drop _merge
merge m:1 sector subround subsample statereg61 fsu hamlet ssstrat hhno srl_n_m using "61lvl5.dta"
tab _merge
drop _merge
merge m:1 sector subround subsample statereg61 fsu hamlet ssstrat hhno srl_n_m using "61lvl10.dta"
tab _merge
drop _merge
merge m:1 sector subround subsample statereg61 fsu hamlet ssstrat hhno using "61lvl11.dta"
tab _merge
drop _merge


ren pblworks_male publicworks_male
ren pblworks_fem publicworks_female

//rename vars for consistency
rename rel_to_head rel_to_hd

rename w_nic61 w_nic
ren w_nco61  w_nco
ren opcode61 opcode
ren pr_nic61 pr_nic
ren pr_nco61 pr_nco
ren sub_nic61 sub_nic
ren sub_nco61 sub_nco
ren inddivcoded61 inddivcoded
ren hh_nic61 hh_nic
ren hh_nco61 hh_nco




//drop entries where different principal or subsidiary activities differ with individuals
egen hhid = group(sector subround subsample statereg61 fsu hamlet ssstrat hhno)
cap: drop id


egen id = group(sector subround subsample statereg61 fsu hamlet ssstrat hhno srl_n_m)
sort id w_nco
replace w_nco=w_nco[_n-1] if (id == id[_n-1] & (w_nco != w_nco[_n-1]))
replace w_nco=w_nco[_n-1] if (id == id[_n-1] & (w_nco != w_nco[_n-1]))
//combine multiple activities per individual into single observation
//egen id = group(statereg61 district fsu hamlet ssstrat hhno srl_n_m)

duplicates drop id srl_n_a, force

***relabel activities, dropping zeros
replace tot_day=0 if tot_day==.
sort id tot_day
by id: gen counter=_N+1-_n


replace tot_day=. if tot_day==0

replace srl_n_a=counter
drop counter

drop cnt_rnd_shft a1 a2 a3 a4 a5 a6 a7 paymode dayswork

reshape wide status_da opcode inddivcoded tot_day ///
			 wage_cash wage_kind wage_tot, i(id) j(srl_n_a)

			 
			 
		
			 
			 
			 
gen mult=mlt_sr/400
cap: gen round=61
ren statereg61 region	
ren substratum stratum2	 
ren ssstrat substratum			 
ren hamlet segment
ren hhsz hhsize
gen land=hhlandp
replace land=99.99 if land>99.99 & land~=.
ren hhreligion religion


ren district districtcode



//CHANGE THE FOLLOWING TO ADD MORE VARIABLES (YOU MAY NEED TO MERGE WITH ADDITIONAL RAW DATA FILES FOR SOME VARIABLES)
//Keep variables of interest
#delimit ;
keep round sector subround subsample region fsu substratum segment hhno srl_n_m districtcode

hhsize land pce hhtype religion hhgroup
hh_nic hh_nco
rel_to_hd sex age gen_edu
	status_wa pr_status sub_status
	w_nic w_nco pr_nic pr_nco sub_nic sub_nco 
	status_da1-wage_tot5
	/*activity vars*/
	 mult /*multiplier vars*/ 
	 domduty-assreq
	 grains-durables
	 publicworks*
	 ;
# delimit cr
	



gen tax=0
replace clothing=0 if clothing==.
replace footwear=0 if footwear==.
replace clothing=clothing+footwear
drop footwear



replace proc=0 if proc==.
replace sugar=0 if sugar==.
gen food_other=proc+sugar
drop proc sugar


**expenditure and household size consistency checks
foreach j in durables education entertain fru grains intox light meat medical milk nondurables oils pulses services transport veg food_other clothing{
replace `j'=0 if `j'==.
}


cap: drop totexp
cap: drop xfood
gen totexp=durables+education+entertain+fru+grains+intox+light+meat+medical+milk+nondurables+oils+pulses+services+transport+veg+food_other+clothing


gen xfood=fru+grains+meat+milk+oils+pulses+veg+food_other
gen wfood=xfood/totexp
gen wgrains=grains/xfood


capture: drop _merge

***day re-scaling/cleaning
***almost no one with more than four activities, we will censor it there and rescale days
cap: drop *5*
cap: drop *6*
cap: drop *7*
forvalues j=1(1)4{
replace tot_day`j'=0 if tot_day`j'==.
}
gen totaldays=tot_day1+tot_day2+tot_day3+tot_day4
forvalues j=1(1)4{
replace tot_day`j'=tot_day`j'*(7/totaldays)
}
drop totaldays



foreach j in hh_nic w_nic pr_nic sub_nic{
gen temp`j'=""
gen nic98=substr(`j',1,4)
merge m:1 nic98 using "`rootdir'nic98_87"
replace temp`j'=nic87 if _merge==3
drop _merge nic98 nic87
gen nic98=substr(`j',1,3)
merge m:1 nic98 using "`rootdir'nic98_87"
replace temp`j'=nic87 if _merge==3
drop _merge nic98 nic87
gen nic98=substr(`j',1,2)
merge m:1 nic98 using "`rootdir'nic98_87"
replace temp`j'=nic87 if _merge==3
drop _merge nic98 nic87
ren `j' `j'98
gen `j'=substr(temp`j',1,2)
*drop temp`j'
}

*individual activity industry codes -need to convert

forvalues j=1(1)4{
ren inddivcoded`j' nic98
merge m:1 nic98 using "`rootdir'nic98_87_div"
drop _merge nic98
ren nic87 inddivcoded`j'
destring inddivcoded`j', replace
*replace inddivcoded`j'=floor(inddivcoded`j'/10)
}




*opcodes
forvalues j=1(1)4{
replace opcode`j'=9 if opcode`j'==10
replace opcode`j'=10 if opcode`j'==11
replace opcode`j'=11 if opcode`j'==12
replace opcode`j'=12 if opcode`j'==13
replace opcode`j'=13 if opcode`j'==14
replace opcode`j'=14 if opcode`j'==15
}

gen sick=0
replace sick=1 if status_wa==61 | status_wa==71 | status_wa==98


*status code conversion
foreach j in status_da1 status_da2 status_da3 status_da4 status_wa pr_status sub_status{
replace `j'=11 if `j'==12
replace `j'=61 if `j'==62
replace `j'=71 if `j'==72
replace `j'=97 if `j'==98
replace `j'=97 if `j'==99
}

**education
gen generaledmod=gen_edu
replace generaledmod=0 if generaledmod==1
replace generaledmod=1 if generaledmod==2 | generaledmod==3 | generaledmod==4
replace generaledmod=2 if generaledmod==5
replace generaledmod=3 if generaledmod==6
replace generaledmod=4 if generaledmod==7
replace generaledmod=5 if generaledmod==8 | generaledmod==10
replace generaledmod=6 if generaledmod>10 & generaledmod~=.
replace generaledmod=. if age<18 | age==. | sex==.


*hh type group
replace hhgroup=9 if hhgroup==3




ren status_wa w_status
cap: gen round=61


#delimit ;
keep round sector subround subsample region fsu segment substratum hhno srl_n_m districtcode/*identifier vars*/
     hhsize  hhtype religion hhgroup land pce /*demographic vars*/
	 rel_to_hd sex age generaledmod
	 sick
	 hh_nic98 w_nic98 pr_nic98 sub_nic98 
	 	 	 hh_nic w_nic pr_nic sub_nic 
			 w_status pr_status sub_status 
			 hh_nco w_nco pr_nco sub_nco
			 tot_day1-tot_day4 status_da1-status_da4 inddivcoded1-inddivcoded4 opcode1-opcode4 wage_tot1-wage_tot4
 /*activity vars*/
	 /*geographic vars*/
	 mult /*multiplier vars*/
	 domduty-assreq
	 /*domestic variables*/
	 grains pulses milk oils veg fru meat food_other
	intox light entertain education medical nondurables transport rent
	services clothing durables tax 
	totexp xfood wfood wgrains 
	publicworks*
	/*consumption variables*/
	 ;
#delimit cr

cap: replace round=61

save "`rootdir'round61_sch10pers", replace




*********Round 66**************

cd "`rootdir'\round66\"



infile using "lv4.dct", clear
//drop blank vars and those without any variation
drop blank charstmp filler1 filler2 sched
save "lv4.dta", replace

infile using "lv2.dct", clear
//drop blank vars and those without any variation
drop blank charstmp filler1 filler2 sched
replace hhlando=hhlando/1000
replace hhlandp=hhlandp/1000
replace hhlandc=hhlandc/1000
save "lv2.dta", replace

infile using "lv5.dct", clear
//drop blank vars and those without any variation
drop blank charstmp filler1 filler2 sched
save "lv5.dta", replace

infile using "lv6.dct", clear
//drop blank vars and those without any variation
drop blank charstmp filler1 filler2 sched
save "lv6.dta", replace

infile using "lv7.dct", clear
//drop blank vars and those without any variation
drop blank charstmp filler1 filler2 sched
save "lv7.dta", replace

infile using "lv12.dct", clear
//drop blank vars and those without any variation
drop blank charstmp filler1 filler2 sched
save "lv12.dta", replace

//reshape level 12
drop rnd_cntr
egen id = group(sector subround subsample statereg66 fsu hamlet ssstrat hhno)
rename valcons30 valcons30_
rename valcons365 valcons365_
reshape wide valcons30 valcons365, i(id) j(srl_n_c)

//aggregate consumption data and convert annual to monthly
gen grains = valcons30_1
gen pulses = valcons30_2
gen milk = valcons30_3 + valcons30_4
gen oils = valcons30_5
gen veg = valcons30_6
gen fru = valcons30_7
gen meat = valcons30_8
gen sugar = valcons30_9 + valcons30_10
gen proc = valcons30_11		//srl no. 11 not exclusively processed food (other)...
gen intox = valcons30_12
gen light = valcons30_13
gen entertain = valcons30_14
gen nondurables = valcons30_15 + valcons30_16 + valcons30_17
gen services = valcons30_18 //??? consumer services
gen transport = valcons30_19	//includes fuel costs...
gen rent = valcons30_20
gen taxes = valcons30_21
gen medical = valcons30_22 + (valcons365_24 * (30/365))
gen education = (valcons365_25 + valcons365_26) * (30/365)
gen clothing = valcons365_27 * (30/365)
gen footwear = valcons365_28 * (30/365)
gen durables = (valcons365_29 + valcons365_30 + valcons365_31 + valcons365_32 + valcons365_33 + valcons365_34 + valcons365_35 + valcons365_36 + valcons365_37) * (30/365)

drop valcons*

save "lv12.dta", replace


infile using "lv11.dct", clear
drop blank
save "lv11.dta", replace



use "lv7.dta", clear

//merge
merge m:1 sector subround subsample statereg66 fsu hamlet ssstrat hhno srl_n_m using "lv4.dta"
tab _merge
drop _merge
merge m:1 sector subround subsample statereg66 fsu hamlet ssstrat hhno using "lv2.dta"
tab _merge
drop _merge
merge m:1 sector subround subsample statereg66 fsu hamlet ssstrat hhno hhno srl_n_m using "lv5.dta"
tab _merge
drop _merge
merge m:1 sector subround subsample statereg66 fsu hamlet ssstrat hhno srl_n_m using "lv6.dta"
tab _merge
drop _merge
merge m:1 sector subround subsample statereg66 fsu hamlet ssstrat hhno srl_n_m using "lv11.dta"
tab _merge
drop _merge
merge m:1 sector subround subsample statereg66 fsu hamlet ssstrat hhno using "lv12.dta"
tab _merge
drop _merge

//rename vars for consistency
rename w_nic66 w_nic
rename w_nco66 w_nco
rename opcode66 opcode
rename pr_nic66 pr_nic
rename pr_nco66 pr_nco
rename sub_nic66 sub_nic
rename sub_nco66 sub_nco
rename inddivcoded66 inddivcoded
ren hh_nic66 hh_nic
ren hh_nco66 hh_nco
ren sub_status66 sub_status





//combine multiple activities per individual into single observation
egen hhid = group(sector subround subsample statereg66 fsu hamlet ssstrat hhno)
cap: drop id
egen id = group(sector subround subsample statereg66 fsu hamlet ssstrat hhno srl_n_m)


duplicates drop id srl_n_a, force

***relabel activities, dropping zeros
replace tot_day=0 if tot_day==.
sort id tot_day
by id: gen counter=_N+1-_n


replace tot_day=. if tot_day==0

replace srl_n_a=counter
drop counter

drop rnd_cntr a7 a6 a5 a5 a4 a3 a2 a1 paymode dayswork

reshape wide status_da opcode inddivcoded tot_day ///
			 wage_cash wage_kind wage_tot, i(id) j(srl_n_a)

		
gen match=1
replace match=2 if NSS~=NSC	
gen mult=MLT/(match*100)


ren hhsz hhsize
ren statereg66 region
ren substratum substratum2
ren ssstrat substratum
ren hamlet segment
gen land=hhlandp
replace land=99.99 if land>99.99 & land~=.
ren hhreligion religion
ren hhtypecode hhtype

gen pce=.

cap: ren district districtcode

#delimit ;
keep round sector subround subsample region fsu substratum segment hhno srl_n_m districtcode

hhsize land pce hhtype religion hhgroup
hh_nic hh_nco
rel_to_hd sex age gen_edu
	status_wa pr_status sub_status
	w_nic w_nco pr_nic pr_nco sub_nic sub_nco 
	status_da1-wage_tot4
	/*activity vars*/
	 mult /*multiplier vars*/ 
	 domduty-assreq
	 grains-durables *nreg*
	 ;
# delimit cr
	

ren taxes tax
replace clothing=0 if clothing==.
replace footwear=0 if footwear==.
replace clothing=clothing+footwear
drop footwear
replace proc=0 if proc==.
replace sugar=0 if sugar==.
gen food_other=proc+sugar
drop proc sugar



**expenditure and household size consistency checks
foreach j in durables education entertain fru grains intox light meat medical milk nondurables oils pulses services transport veg food_other clothing{
replace `j'=0 if `j'==.
}


cap: drop totexp
cap: drop xfood
gen totexp=durables+education+entertain+fru+grains+intox+light+meat+medical+milk+nondurables+oils+pulses+services+transport+veg+food_other+clothing


gen xfood=fru+grains+meat+milk+oils+pulses+veg+food_other
gen wfood=xfood/totexp
gen wgrains=grains/xfood





capture: drop _merge
***day re-scaling/cleaning
***almost no one with more than four activities, we will censor it there and rescale days
cap: drop *5*
cap: drop *6*
cap: drop *7*
forvalues j=1(1)4{
replace tot_day`j'=0 if tot_day`j'==.
}
gen totaldays=tot_day1+tot_day2+tot_day3+tot_day4
forvalues j=1(1)4{
replace tot_day`j'=tot_day`j'*(7/totaldays)
}
drop totaldays

*first convert nic04 to nic98
foreach j in hh_nic w_nic pr_nic sub_nic{
gen nic04=substr(`j',1,4)
ren `j' `j'04
merge m:1 nic04 using "`rootdir'nic04_98"
gen `j'=nic98
drop nic98 nic04 _merge
}
*then convert to 2-digit nic87
foreach j in hh_nic w_nic pr_nic sub_nic{
gen temp`j'=""
gen nic98=substr(`j',1,4)
merge m:1 nic98 using "`rootdir'nic98_87"
replace temp`j'=nic87 if _merge==3
drop _merge nic98 nic87
gen nic98=substr(`j',1,3)
merge m:1 nic98 using "`rootdir'nic98_87"
replace temp`j'=nic87 if _merge==3
drop _merge nic98 nic87
gen nic98=substr(`j',1,2)
merge m:1 nic98 using "`rootdir'nic98_87"
replace temp`j'=nic87 if _merge==3
drop _merge nic98 nic87
drop `j'
gen `j'=substr(temp`j',1,2)
*drop temp`j'
}




foreach j in hh_nco w_nco pr_nco sub_nco{
gen nco04=`j'
ren `j' `j'04
sort nco04
merge m:1 nco04 using "`rootdir'nco_cross"
tab _merge
drop nco04 _merge
ren nco68 `j'
}


*individual activity industry codes
forvalues j=1(1)4{
ren inddivcoded`j' nic98
merge m:1 nic98 using "`rootdir'nic98_87_div"
drop _merge nic98
ren nic87 inddivcoded`j'
destring inddivcoded`j', replace
destring opcode`j', replace
}

*tab pr_nic
*tab pr_nic04
*tab hh_nco
*tab hh_nco04
gen sick=0
replace sick=1 if status_wa==61 | status_wa==71 | status_wa==98

*status code conversion
foreach j in status_da1 status_da2 status_da3 status_da4 status_wa pr_status sub_status{
replace `j'=11 if `j'==12
replace `j'=41 if `j'==42
replace `j'=61 if `j'==62
replace `j'=71 if `j'==72
replace `j'=97 if `j'==98
replace `j'=97 if `j'==99
}


*fix opcodes
forvalues j=1(1)4{
replace opcode`j'=opcode`j'-1 if opcode`j'>=10
}


***education
gen generaledmod=gen_edu
replace generaledmod=0 if generaledmod==1
replace generaledmod=1 if generaledmod==2 | generaledmod==3 | generaledmod==4
replace generaledmod=2 if generaledmod==5
replace generaledmod=3 if generaledmod==6
replace generaledmod=4 if generaledmod==7
replace generaledmod=5 if generaledmod==8 | generaledmod==10
replace generaledmod=6 if generaledmod>10 & generaledmod~=.
replace generaledmod=. if age<18 | age==. | sex==.


*hh type group
replace hhgroup=9 if hhgroup==3




ren status_wa w_status
cap: gen round=66


#delimit ;
keep round sector subround subsample region fsu segment substratum hhno srl_n_m districtcode/*identifier vars*/
     hhsize  hhtype religion hhgroup land pce /*demographic vars*/
	 rel_to_hd sex age generaledmod
	 sick
	 hh_nic04 w_nic04 pr_nic04 sub_nic04
	 	 	 hh_nic w_nic pr_nic sub_nic 
			 w_status pr_status sub_status 
			 hh_nco w_nco pr_nco sub_nco
			 hh_nco04 w_nco04 pr_nco04 sub_nco04
			 tot_day1-tot_day4 status_da1-status_da4 inddivcoded1-inddivcoded4 opcode1-opcode4 wage_tot1-wage_tot4
 /*activity vars*/
	 /*geographic vars*/
	 mult /*multiplier vars*/
	 domduty-assreq
	 /*domestic variables*/
	 grains pulses milk oils veg fru meat food_other
	intox light entertain education medical nondurables transport rent
	services clothing durables tax 
	totexp xfood wfood wgrains 	*nreg*
	/*consumption variables*/
	 ;
#delimit cr

cap: replace round=66

save "`rootdir'round66_sch10pers", replace










***Part 2: defining additional variables
***HH-level education  and demographic variables
***Adult male and female shares by 1-digit sectors (industry and occupation, usual and subsidiary)
**conditional on working -- want to exclude the non-workers
***Adult male and female shares by current weekly activity (intensive margin)-- most immediate cal-reqs
***Adult male and female shares by operation codes as well?
**Adult female shares working in various home production activities


**share in manual labor (using NCO codes?)
**share in domestic activities?

***See analysis.do

cd `rootdir'

local hhid round sector subround subsample region fsu substratum segment hhno

foreach round in 38 43 55 61 66{
use round`round'_sch10pers, clear


bysort `hhid': gen hhsize_den=_N
drop if hhsize==0
gen hhconsistent=0
replace hhconsistent=1 if hhsize==hhsize_den & hhsize_den~=. & hhsize~=.
tab hhconsistent

***Demographics
***Note there are some missing sex and age cells. Exclude these from any hh construction.




foreach j in a s b c p m y{
gen `j'male=0
gen `j'female=0
}

*adults
replace amale=1 if sex==1 & age>17 & age<60
replace afemale=1 if sex==2 & age>17 & age<60
bysort `hhid': egen numamale=total(amale)
bysort `hhid': egen numafemale=total(afemale)

*seniors
replace smale=1 if sex==1 & age>=60 & age<.
replace sfemale=1 if sex==2 & age>=60 & age<.
bysort `hhid': egen numsmale=total(smale)
bysort `hhid': egen numsfemale=total(sfemale)


*babies
replace bmale=1 if sex==1 & age<=2
replace bfemale=1 if sex==2 & age<=2
bysort `hhid': egen numbmale=total(bmale)
bysort `hhid': egen numbfemale=total(bfemale)

*children 3-4
replace cmale=1 if sex==1 & age>2 & age<5
replace cfemale=1 if sex==2 & age>2 & age<5
bysort `hhid': egen numcmale=total(cmale)
bysort `hhid': egen numcfemale=total(cfemale)

* 5-9 --primary school age
replace pmale=1 if sex==1 & age>4 & age<10
replace pfemale=1 if sex==2 & age>4 & age<10
bysort `hhid': egen numpmale=total(pmale)
bysort `hhid': egen numpfemale=total(pfemale)

*10-14 --middle school age
replace mmale=1 if sex==1 & age>9 & age<15
replace mfemale=1 if sex==2 & age>9 & age<15
bysort `hhid': egen nummmale=total(mmale)
bysort `hhid': egen nummfemale=total(mfemale)

*15-17 --youths
replace ymale=1 if sex==1 & age>14 & age<18
replace yfemale=1 if sex==2 & age>14 & age<18
bysort `hhid': egen numymale=total(ymale)
bysort `hhid': egen numyfemale=total(yfemale)


foreach j in a s b c p m y{
gen dmratio`j'=num`j'male/hhsize_den
gen dfratio`j'=num`j'female/hhsize_den
replace dmratio`j'=0 if dmratio`j'==.
replace dfratio`j'=0 if dfratio`j'==.
}




***Generate education shares
***education conversion - do we want years of schooling for children?
replace generaledmod=. if age<18 | age==. | sex==.

*0 - not literate
*1 - literate but no formal schooling
*2 - literate but below primary
*3 - primary
*4 - middle
*5 - secondary
*6 - university


bysort `hhid': egen edmale_den=total(amale+smale) if generaledmod~=.
bysort `hhid': egen edfemale_den=total(afemale+sfemale) if generaledmod~=.

***Fraction of male, female working age adults with education levels 0-6
forvalues j=0(1)6{
bysort `hhid': egen nummale_ed`j'=total(amale+smale) if generaledmod==`j'
bysort `hhid': egen numfemale_ed`j'=total(afemale+sfemale) if generaledmod==`j'
bysort `hhid': egen edmaleratio`j'=min(nummale_ed`j')
bysort `hhid': egen edfemaleratio`j'=min(numfemale_ed`j')

replace edmaleratio`j'=edmaleratio`j'/hhsize_den
replace edfemaleratio`j'=edfemaleratio`j'/hhsize_den
replace edmaleratio`j'=0 if edmaleratio`j'==. 
replace edfemaleratio`j'=0 if edfemaleratio`j'==. 
}





gen yschooling=.
*middle school grades 6-8
replace yschooling=0 if generaledmod==0
replace yschooling=0 if generaledmod==1
replace yschooling=0 if generaledmod==1
replace yschooling=(11-5)/2 if generaledmod==2
replace yschooling=(11-5) if generaledmod==3
replace yschooling=(14-5) if generaledmod==4
replace yschooling=(18-5) if generaledmod==5
replace yschooling=(22-5) if generaledmod==6
***mean years of adult schooling
bysort `hhid': egen meana_school=mean(yschooling)




***Household head
gen head=0
replace head=1 if rel_to_hd==1

gen female=0
replace female=1 if sex==2
gen male=0
replace male=1 if sex==1
bysort `hhid': egen nummale=total(male)
bysort `hhid': egen numfemale=total(female)


bysort `hhid': egen headed=max(head*generaledmod)
bysort `hhid': egen headfemale=max(head*female)
bysort `hhid': egen headage=max(head*age)
drop head


***Economically active?
gen w_active=0
replace w_active=1 if w_status<60
gen pr_active=0
replace pr_active=1 if pr_status<60
gen sub_active=0
replace sub_active=1 if sub_status<60

**share of household that is economically active (including children here)
foreach j in w pr sub{
bysort `hhid': egen sharem`j'_active=total(`j'_active*male)
replace sharem`j'_active=sharem`j'_active/hhsize_den
bysort `hhid': egen sharef`j'_active=total(`j'_active*female)
replace sharef`j'_active=sharef`j'_active/hhsize_den
replace sharem`j'_active=0 if sharem`j'_active==.
replace sharef`j'_active=0 if sharef`j'_active==.
}


***Active male and female shares by 1-digit sectors (industry and occupation, primary, subsidiary and weekly)




foreach k in pr sub w{

destring `k'_nic, gen(`k'_nic1) force
replace `k'_nic1=floor(`k'_nic1/10)


forvalues j=0(1)9{
gen `k'_nic1dum`j'=0
replace `k'_nic1dum`j'=1 if `k'_nic1==`j'
bysort `hhid': egen sharem`k'_nic`j'=total(`k'_active*male*`k'_nic1dum`j')
replace sharem`k'_nic`j'=sharem`k'_nic`j'/hhsize_den
bysort `hhid': egen sharef`k'_nic`j'=total(`k'_active*female*`k'_nic1dum`j')
replace sharef`k'_nic`j'=sharef`k'_nic`j'/hhsize_den
replace sharem`k'_nic`j'=0 if sharem`k'_nic`j'==.
replace sharef`k'_nic`j'=0 if sharef`k'_nic`j'==.
}


destring `k'_nco, gen(`k'_nco1) force
replace `k'_nco1=floor(`k'_nco1/100)

forvalues j=0(1)9{
gen `k'_nco1dum`j'=0
replace `k'_nco1dum`j'=1 if `k'_nco1==`j'
bysort `hhid': egen sharem`k'_nco`j'=total(`k'_active*male*`k'_nco1dum`j')
replace sharem`k'_nco`j'=sharem`k'_nco`j'/hhsize_den
bysort `hhid': egen sharef`k'_nco`j'=total(`k'_active*female*`k'_nco1dum`j')
replace sharef`k'_nco`j'=sharef`k'_nco`j'/hhsize_den
replace sharem`k'_nco`j'=0 if sharem`k'_nco`j'==.
replace sharef`k'_nco`j'=0 if sharef`k'_nco`j'==.
}
}



gen d_active=0
forvalues j=1(1)4{
gen d_active`j'=0
replace d_active`j'=1 if status_da`j'<60
replace d_active=d_active+(d_active`j'*tot_day`j')

*setting operation codes to missing for urban households, should not exist
replace opcode`j'=. if sector==2
}

bysort `hhid': egen sharemd_active=total(male*d_active)
replace sharemd_active=sharemd_active/(hhsize_den*7)
bysort `hhid': egen sharefd_active=total(female*d_active)
replace sharefd_active=sharefd_active/(hhsize_den*7)
replace sharemd_active=0 if sharemd_active==.
replace sharefd_active=0 if sharefd_active==.

forvalues j=0(1)9{
gen d_nic1dum`j'=0
forvalues k=1(1)4{
gen d_nic1dum`j'_`k'=0
replace d_nic1dum`j'_`k'=1 if inddivcoded`k'==`j'
replace d_nic1dum`j'=d_nic1dum`j'+(tot_day`k'*d_active`k'*d_nic1dum`j'_`k')
}
bysort `hhid': egen sharemd_nic`j'=total(male*d_nic1dum`j')
replace sharemd_nic`j'=sharemd_nic`j'/(hhsize_den*7)
bysort `hhid': egen sharefd_nic`j'=total(female*d_nic1dum`j')
replace sharefd_nic`j'=sharefd_nic`j'/(hhsize_den*7)
replace sharemd_nic`j'=0 if sharemd_nic`j'==.
replace sharefd_nic`j'=0 if sharefd_nic`j'==.
}


forvalues j=1(1)14{
gen d_opdum`j'=0

forvalues k=1(1)4{
gen d_opdum`j'_`k'=0
replace d_opdum`j'_`k'=1 if opcode`k'==`j'
replace d_opdum`j'=d_opdum`j'+(tot_day`k'*d_active`k'*d_opdum`j'_`k')
}
bysort `hhid': egen sharemd_op`j'=total(male*d_opdum`j')
replace sharemd_op`j'=sharemd_op`j'/(hhsize_den*7)
bysort `hhid': egen sharefd_op`j'=total(female*d_opdum`j')
replace sharefd_op`j'=sharefd_op`j'/(hhsize_den*7)
replace sharemd_op`j'=0 if sharemd_op`j'==.
replace sharefd_op`j'=0 if sharefd_op`j'==.
}




****Share of those with status=93 in various home production activities  (adult females only?)
**for now we'll leave out things like preparation of gur, sewing, tutoring, baskets, preserving food, and poultry but can add them in later...
foreach j in garden collectfood collectwood huskpaddy grindgrain prepdung collectwater outsidewater{
gen dum`j'=0
replace dum`j'=1 if `j'==1 & (pr_status==92 | pr_status==93)

bysort `hhid': egen sharem`j'=total(dum`j'*male)
replace sharem`j'=sharem`j'/hhsize_den
bysort `hhid': egen sharef`j'=total(dum`j'*female)
replace sharef`j'=sharef`j'/hhsize_den
replace sharem`j'=0 if sharem`j'==.
replace sharef`j'=0 if sharef`j'==.
}

cap: gen districtcode=.
cap: gen stratum=.

duplicates drop `hhid', force

# delimit ;
keep round sector subround stratum subsample region fsu segment stratum substratum hhno districtcode /*identifier vars*/
     hhsize  hhtype religion hhgroup land pce /*demographic vars*/
	 hh_nic hh_nco
	 /*variables now aggregated to household level */
	 hhsize_den dmratio* dfratio*
	 edmaleratio* edfemaleratio*
	 meana_school
	 headed headfemale headage
	 sharem* sharef*
	 
 	 mult /*multiplier vars*/
	 /*consumption variables*/
	 grains pulses milk oils veg fru meat food_other
	intox light entertain education medical nondurables transport rent
	services clothing durables tax 
	totexp xfood wfood wgrains ;
# delimit cr


save round`round'_sch10hh, replace
}







****Prep: district level employment in food retail/distribution**** (numbers, and as share of pop.)



local hhid round sector subround subsample region fsu substratum segment hhno

foreach round in 38 43 55 61 66{

use round`round'_sch10pers, clear

bysort `hhid': gen hhsize_den=_N
drop if hhsize==0
gen hhconsistent=0
replace hhconsistent=1 if hhsize==hhsize_den & hhsize_den~=. & hhsize~=.
tab hhconsistent

gen food=0
foreach j in pr_nic sub_nic w_nic{
replace food=1 if `j'=="60"
replace food=1 if `j'=="65"
replace food=1 if `j'=="69"
*food=1 if `j'==68
}
cap: gen districtcode=.
# delimit ;
keep round sector subround subsample region fsu segment substratum hhno districtcode /*identifier vars*/
     hhsize food mult
	 ;
# delimit cr

save round`round'_sch10food, replace
}


